df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
## [1] "player" "dunk_tot" "dunk_pct" "rim_tot"
## [5] "rim_pct" "rim_asted" "other2pt_tot" "other2pt_pct"
## [9] "other2pt_asted" "3pt_tot" "3pt_pct" "3pt_asted"
## [13] "games" "mp_per_g" "fg_per_g" "fga_per_g"
## [17] "fg_pct" "fg2_per_g" "fg2a_per_g" "fg2_pct"
## [21] "fg3_per_g" "fg3a_per_g" "fg3_pct" "ft_per_g"
## [25] "fta_per_g" "ft_pct" "orb_per_g" "drb_per_g"
## [29] "trb_per_g" "ast_per_g" "stl_per_g" "blk_per_g"
## [33] "tov_per_g" "pts_per_g"
path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')
df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(drop_cols)
##
## # Now:
## data %>% select(all_of(drop_cols))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)
colnames(df_career_stats)
## [1] "pick_overall" "player" "college_name" "seasons" "g"
## [6] "fg_pct" "fg3_pct" "ft_pct" "mp_per_g" "pts_per_g"
## [11] "trb_per_g" "ast_per_g" "ws" "ws_per_48" "bpm"
## [16] "vorp" "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
pra_per_g = pts_per_g + trb_per_g + ast_per_g,
pick_overall = factor(pick_overall)
)
# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
summarize(avg_mpg = mean(mp_per_g),
avg_ppg = mean(pts_per_g),
avg_trbpg = mean(trb_per_g),
avg_apg = mean(ast_per_g),
avg_prapg = mean(pra_per_g))
print(draft_means, n = 14)
## # A tibble: 14 × 6
## pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.2 18.8 6.52 4.32 29.7
## 2 2 26.9 13.7 4.96 2.88 21.6
## 3 3 30.0 17.6 6.3 3.52 27.4
## 4 4 27.5 12.8 5.39 2.48 20.7
## 5 5 26.0 12.7 4.92 3.55 21.2
## 6 6 23.2 10.2 4.35 2.26 16.8
## 7 7 27.6 13.4 4.88 2.77 21.0
## 8 8 21.1 8.82 3.37 1.77 14.0
## 9 9 24.5 10.9 4.55 2.52 18.0
## 10 10 23.5 10.4 3.61 2.31 16.3
## 11 11 21.1 10.2 3.69 2.16 16.0
## 12 12 24.6 10.6 4.47 2.23 17.3
## 13 13 22.6 10.9 3.99 2.23 17.1
## 14 14 20.3 8.78 3.75 1.3 13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
geom_boxplot() +
labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")
df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |>
separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))
df2 <- df2 |>
separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))
df2 <- df2 |>
separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))
df2 <-df2 |>
mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))
df2 <- df2 |> mutate(
vorp_per_g = vorp / g
)
df_top_players <- df2 |>
group_by(pick_overall) |>
summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))
df_bottom_players <- df2 |>
group_by(pick_overall) |>
summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))
# metric favors big men
# make the rebound percentile higher
is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
ppg <- df_top_players |> pull(pts_per_g_nba)
apg <- df_top_players |> pull(ast_per_g_nba)
prapg <- df_top_players |> pull(pra_per_g)
df_top_rb <- df2 |>
group_by(pick_overall) |>
summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
rpg <- df_top_rb |> pull(trb_per_g)
df_top_vorp <- df2 |>
group_by(pick_overall) |>
summarize(vorp_per_g = median(vorp_per_g))
vorppg <- df_top_vorp |> pull(vorp_per_g)
df <- df2 |> filter(pick_overall == pick_number) |>
filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
filter(vorp_per_g >= vorppg[[pick_number]])
# must also have played at least most of their career in the nba
df <- df |> filter(seasons >= 4 / 5 * (2023-year))
return(df)
}
is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
ppg <- df_bottom_players |> pull(pts_per_g_nba)
apg <- df_bottom_players |> pull(ast_per_g_nba)
prapg <- df_bottom_players |> pull(pra_per_g)
df_bottom_rb <- df2 |>
group_by(pick_overall) |>
summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
rpg <- df_bottom_rb |> pull(trb_per_g)
df_bottom_vorp <- df2 |>
group_by(pick_overall) |>
summarize(vorp_per_g = quantile(vorp_per_g, probs = 0.3))
vorppg <- df_bottom_vorp |> pull(vorp_per_g)
# playing less than half the seasons since drafted makes you a bust
df <- df2 |> filter(pick_overall == pick_number) |>
filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)
# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)
df_pick_1
## # A tibble: 7 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 John Wall 33 36 91.7% 116 182 63.7%
## 2 Kyrie Irving 0 0 0% 26 39 66.7%
## 3 Anthony Davis 96 98 98.0% 152 174 87.4%
## 4 Karl-Anthony T… 22 24 91.7% 87 121 71.9%
## 5 Ben Simmons 56 61 91.8% 159 220 72.3%
## 6 Zion Williamson 72 79 91.1% 247 313 78.9%
## 7 Anthony Edwards 27 27 100.0% 89 129 69.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Anthony Bennett 53 58 91.4% 100 140 71.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)
df_pick_2
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 D'Angelo Russe… 4 4 100.0% 70 110 63.6%
## 2 Brandon Ingram 17 17 100.0% 69 117 59.0%
## 3 Lonzo Ball 37 40 92.5% 94 120 78.3%
## 4 Ja Morant 28 31 90.3% 160 264 60.6%
## 5 Chet Holmgren 57 57 100.0% 105 125 84.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Derrick Willia… 56 60 93.3% 135 188 71.8%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)
df_pick_3
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Bradley Beal 18 20 90.0% 89 137 65.0%
## 2 Joel Embiid 30 30 100.0% 80 99 80.8%
## 3 Jayson Tatum 18 21 85.7% 79 126 62.7%
## 4 Evan Mobley 63 66 95.5% 113 144 78.5%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Jahlil Okafor 64 67 95.5% 213 270 78.9%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)
df_pick_4
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Aaron Gordon 54 56 96.4% 137 198 69.2%
## 2 Jaren Jackson … 31 31 100.0% 61 93 65.6%
## 3 Scottie Barnes 19 21 90.5% 61 89 68.5%
## 4 Keegan Murray 63 67 94.0% 196 277 70.8%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## # dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## # rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## # 3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## # fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)
df_pick_5
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 DeMarcus Cousi… 53 57 93.0% 144 189 76.2%
## 2 De'Aaron Fox 20 21 95.2% 131 203 64.5%
## 3 Trae Young 0 0 0% 105 201 52.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Thomas Robinson 70 83 84.3% 169 262 64.5%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)
df_pick_6
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Damian Lillard 13 17 76.5% 98 169 58.0%
## 2 Nerlens Noel 48 50 96.0% 76 99 76.8%
## 3 Marcus Smart 16 18 88.9% 78 110 70.9%
## 4 Buddy Hield 18 22 81.8% 119 178 66.9%
## 5 Onyeka Okongwu 58 61 95.1% 135 186 72.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ekpe Udoh 30 32 93.8% 78 109 71.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)
df_pick_7
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Julius Randle 37 40 92.5% 132 197 67.0%
## 2 Jamal Murray 18 19 94.7% 77 111 69.4%
## 3 Lauri Markkanen 20 24 83.3% 65 100 65.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ben McLemore 44 45 97.8% 90 126 71.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)
df_pick_8
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Al-Farouq Aminu 46 48 95.8% 112 173 64.7%
## 2 Kentavious Cal… 15 16 93.8% 63 94 67.0%
## 3 Franz Wagner 11 11 100.0% 63 93 67.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## # dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## # rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## # 3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## # fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)
df_pick_9
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Gordon Hayward 19 20 95.0% 89 128 69.5%
## 2 Kemba Walker 3 3 100.0% 115 196 58.7%
## 3 Andre Drummond 80 89 89.9% 130 185 70.3%
## 4 Trey Burke 9 9 100.0% 67 105 63.8%
## 5 Jakob Poeltl 32 34 94.1% 199 284 70.1%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Kevin Knox 18 20 90.0% 65 99 65.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)
df_pick_10
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Paul George 18 22 81.8% 70 106 66.0%
## 2 CJ McCollum 3 3 100.0% 34 63 54.0%
## 3 Elfrid Payton 21 24 87.5% 169 247 68.4%
## 4 Mikal Bridges 35 42 83.3% 109 161 67.7%
## 5 Jalen Smith 49 52 94.2% 114 158 72.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ziaire Williams 10 11 90.9% 26 49 53.1%
## 2 Johnny Davis 16 19 84.2% 89 143 62.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)
df_pick_11
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Klay Thompson 8 8 100.0% 66 110 60.0%
## 2 Myles Turner 11 13 84.6% 40 54 74.1%
## 3 Domantas Sabon… 22 24 91.7% 157 214 73.4%
## 4 Shai Gilgeous-… 11 11 100.0% 108 182 59.3%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 James Bouknight 12 12 100.0% 52 79 65.8%
## 2 Jett Howard 6 6 100.0% 29 47 61.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)
df_pick_12
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Steven Adams 29 33 87.9% 85 129 65.9%
## 2 Miles Bridges 30 35 85.7% 84 128 65.6%
## 3 Tyrese Halibur… 7 8 87.5% 46 62 74.2%
## 4 Jalen Williams 25 27 92.6% 124 186 66.7%
## 5 Dereck Lively … 54 55 98.2% 74 96 77.1%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Xavier Henry 17 17 100.0% 60 90 66.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)
df_pick_13
## # A tibble: 7 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ed Davis 26 27 96.3% 42 50 84.0%
## 2 Kelly Olynyk 25 28 89.3% 152 212 71.7%
## 3 Zach LaVine 21 25 84.0% 51 90 56.7%
## 4 Devin Booker 8 9 88.9% 42 59 71.2%
## 5 Donovan Mitche… 9 13 69.2% 64 116 55.2%
## 6 Tyler Herro 4 5 80.0% 56 84 66.7%
## 7 Jalen Duren 70 76 92.1% 111 152 73.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Kendall Marsha… 0 0 0% 35 53 66.0%
## 2 Jerome Robinson 12 13 92.3% 98 157 62.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)
df_pick_14
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Marcus Morris 31 33 93.9% 114 147 77.6%
## 2 T.J. Warren 37 37 100.0% 192 251 76.5%
## 3 Cameron Payne 3 3 100.0% 53 87 60.9%
## 4 Bam Adebayo 99 105 94.3% 138 185 74.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Romeo Langford 10 14 71.4% 91 138 65.9%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes
df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)
df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)
print(df_good, n = 20)
## # A tibble: 64 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 John Wall 33 36 91.7% 116 182 63.7%
## 2 Kyrie Irving 0 0 0% 26 39 66.7%
## 3 Anthony Davis 96 98 98.0% 152 174 87.4%
## 4 Karl-Anthony … 22 24 91.7% 87 121 71.9%
## 5 Ben Simmons 56 61 91.8% 159 220 72.3%
## 6 Zion Williams… 72 79 91.1% 247 313 78.9%
## 7 Anthony Edwar… 27 27 100.0% 89 129 69.0%
## 8 D'Angelo Russ… 4 4 100.0% 70 110 63.6%
## 9 Brandon Ingram 17 17 100.0% 69 117 59.0%
## 10 Lonzo Ball 37 40 92.5% 94 120 78.3%
## 11 Ja Morant 28 31 90.3% 160 264 60.6%
## 12 Chet Holmgren 57 57 100.0% 105 125 84.0%
## 13 Bradley Beal 18 20 90.0% 89 137 65.0%
## 14 Joel Embiid 30 30 100.0% 80 99 80.8%
## 15 Jayson Tatum 18 21 85.7% 79 126 62.7%
## 16 Evan Mobley 63 66 95.5% 113 144 78.5%
## 17 Aaron Gordon 54 56 96.4% 137 198 69.2%
## 18 Jaren Jackson… 31 31 100.0% 61 93 65.6%
## 19 Scottie Barnes 19 21 90.5% 61 89 68.5%
## 20 Keegan Murray 63 67 94.0% 196 277 70.8%
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
## [1] "John Wall" "Kyrie Irving"
## [3] "Anthony Davis" "Karl-Anthony Towns"
## [5] "Ben Simmons" "Zion Williamson"
## [7] "Anthony Edwards" "D'Angelo Russell"
## [9] "Brandon Ingram" "Lonzo Ball"
## [11] "Ja Morant" "Chet Holmgren"
## [13] "Bradley Beal" "Joel Embiid"
## [15] "Jayson Tatum" "Evan Mobley"
## [17] "Aaron Gordon" "Jaren Jackson Jr."
## [19] "Scottie Barnes" "Keegan Murray"
## [21] "DeMarcus Cousins" "De'Aaron Fox"
## [23] "Trae Young" "Damian Lillard"
## [25] "Nerlens Noel" "Marcus Smart"
## [27] "Buddy Hield" "Onyeka Okongwu"
## [29] "Julius Randle" "Jamal Murray"
## [31] "Lauri Markkanen" "Al-Farouq Aminu"
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"
## [35] "Gordon Hayward" "Kemba Walker"
## [37] "Andre Drummond" "Trey Burke"
## [39] "Jakob Poeltl" "Paul George"
## [41] "CJ McCollum" "Elfrid Payton"
## [43] "Mikal Bridges" "Jalen Smith"
## [45] "Klay Thompson" "Myles Turner"
## [47] "Domantas Sabonis" "Shai Gilgeous-Alexander"
## [49] "Steven Adams" "Miles Bridges"
## [51] "Tyrese Haliburton" "Jalen Williams"
## [53] "Dereck Lively II" "Ed Davis"
## [55] "Kelly Olynyk" "Zach LaVine"
## [57] "Devin Booker" "Donovan Mitchell"
## [59] "Tyler Herro" "Jalen Duren"
## [61] "Marcus Morris" "T.J. Warren"
## [63] "Cameron Payne" "Bam Adebayo"
good_list <- df_good |> pull(player)
df_busts |> pull(player)
## [1] "Anthony Bennett" "Derrick Williams" "Jahlil Okafor" "Thomas Robinson"
## [5] "Ekpe Udoh" "Ben McLemore" "Kevin Knox" "Ziaire Williams"
## [9] "Johnny Davis" "James Bouknight" "Jett Howard" "Xavier Henry"
## [13] "Kendall Marshall" "Jerome Robinson" "Romeo Langford"
bust_list <- df_busts |> pull(player)
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
geom_point(color = "green", size = 4, alpha = 0.5) +
geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
labs(title = "CBB Shot Selection for Good Value NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
theme_bw()
plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
geom_point(color = "red", size = 4, alpha = 0.5) +
geom_label_repel(size = 3) +
labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
theme_bw()
plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
labs(title = "CBB Shot Selection for NBA Lottery Picks",
x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game", color = "Value") +
scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
theme_bw()
plot_combined
plot_busts
plot_good
library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
colnames(df2)
## [1] "player" "dunk_made" "dunk_attempts"
## [4] "dunk_pct" "rim_made" "rim_attempts"
## [7] "rim_pct" "rim_asted" "other2pt_made"
## [10] "other2pt_attempts" "other2pt_pct" "other2pt_asted"
## [13] "3pt_tot" "3pt_pct" "3pt_asted"
## [16] "games" "mp_per_g_college" "fg_per_g"
## [19] "fga_per_g" "fg_pct_college" "fg2_per_g"
## [22] "fg2a_per_g" "fg2_pct" "fg3_per_g"
## [25] "fg3a_per_g" "fg3_pct_college" "ft_per_g"
## [28] "fta_per_g" "ft_pct_college" "orb_per_g"
## [31] "drb_per_g" "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g" "blk_per_g" "tov_per_g"
## [37] "pts_per_g_college" "pick_overall" "college_name"
## [40] "seasons" "g" "fg_pct_nba"
## [43] "fg3_pct_nba" "ft_pct_nba" "mp_per_g_nba"
## [46] "pts_per_g_nba" "trb_per_g_nba" "ast_per_g_nba"
## [49] "ws" "ws_per_48" "bpm"
## [52] "vorp" "year" "pra_per_g"
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")
df_cbb <- df3 |> select(
dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g,
`3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |>
mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
rim_pct = parse_number(rim_pct) / 100,
rim_asted = parse_number(rim_asted) / 100,
other2pt_pct = parse_number(other2pt_pct) / 100,
other2pt_asted = parse_number(other2pt_asted) / 100,
fg3_asted = parse_number(fg3_asted) / 100,
fg3_pct_per_g = fg3_per_g / fg3a_per_g)
df_cbb <- df_cbb |>
mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
relocate(fg3_pct_per_g, .after = fg3_asted)
to_per_game <- function(x, games) {
x <- x / games
return(x)
}
df_cbb <- df_cbb |> # making everything per game
mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made,
other2pt_attempts), function(x) to_per_game(x, games)))
colSums(is.na(df_cbb))
## dunk_made dunk_attempts dunk_pct rim_made
## 0 0 0 0
## rim_attempts rim_pct rim_asted other2pt_made
## 0 0 0 0
## other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## 0 0 0 0
## fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g
## 0 0 0 0
## games ft_per_g fta_per_g ast_per_g_college
## 0 0 0 0
## orb_per_g drb_per_g stl_per_g blk_per_g
## 0 0 0 0
## tov_per_g pts_per_g_college
## 0 0
Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r
df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
## dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.338 0.359 0.125 0.886 1.15 -0.582 -0.936
## 2 -0.789 -0.813 0.332 -0.925 -1.11 1.48 -1.47
## 3 1.25 1.23 0.325 1.46 1.11 1.46 0.662
## 4 0.599 0.655 0.0208 0.478 0.382 0.591 0.733
## 5 1.13 1.15 0.215 1.44 1.18 1.16 0.633
## 6 0.246 0.233 0.270 0.0605 -0.0262 0.521 -0.0688
## 7 -0.560 -0.589 0.387 0.233 0.367 -0.443 0.0432
## 8 0.102 0.0619 0.408 -0.246 -0.174 -0.443 0.0668
## 9 -0.635 -0.661 0.353 -0.687 -0.754 0.228 0.615
## 10 -0.619 -0.573 -0.560 -0.839 -0.861 -0.261 -0.623
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## # fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## # stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)
corr_matrix <- cor(df_cbb_scaled)
colnames(corr_matrix)
## [1] "dunk_made" "dunk_attempts" "dunk_pct"
## [4] "rim_made" "rim_attempts" "rim_pct"
## [7] "rim_asted" "other2pt_made" "other2pt_attempts"
## [10] "other2pt_pct" "other2pt_asted" "fg2_pct"
## [13] "fg3_per_g" "fg3a_per_g" "fg3_asted"
## [16] "fg3_pct_per_g" "ft_per_g" "fta_per_g"
## [19] "ast_per_g_college" "orb_per_g" "drb_per_g"
## [22] "stl_per_g" "blk_per_g" "tov_per_g"
## [25] "pts_per_g_college"
ggcorrplot(corr_matrix, method = "square")
ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
type = "lower", tl.cex = 7, title = "Correlations between different college statistics")
Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f
df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
## dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Anthony Bennett 1.5142857 1.6571429 0.914 2.857143 4.000000
## Marquese Chriss 1.5294118 1.9117647 0.800 3.000000 4.264706
## Patrick Patterson 0.6701031 0.7216495 0.929 1.402062 1.701031
## Jaren Jackson Jr. 0.8857143 0.8857143 1.000 1.742857 2.657143
## Cason Wallace 0.3437500 0.3437500 1.000 1.625000 2.281250
## Bradley Beal 0.4864865 0.5405405 0.900 2.405405 3.702703
## rim_pct rim_asted other2pt_made other2pt_attempts
## Anthony Bennett 0.714 0.620 1.6000000 3.685714
## Marquese Chriss 0.703 0.529 1.6470588 3.911765
## Patrick Patterson 0.824 0.647 0.5154639 1.309278
## Jaren Jackson Jr. 0.656 0.459 0.5714286 1.228571
## Cason Wallace 0.712 0.096 1.3437500 3.500000
## Bradley Beal 0.650 0.348 0.6216216 1.891892
## other2pt_pct other2pt_asted fg2_pct fg3_per_g fg3a_per_g
## Anthony Bennett 0.434 0.768 0.587 1.0 2.7
## Marquese Chriss 0.421 0.589 0.568 0.6 1.8
## Patrick Patterson 0.394 0.600 0.604 0.2 0.8
## Jaren Jackson Jr. 0.465 0.600 0.596 1.1 2.7
## Cason Wallace 0.384 0.093 0.514 1.4 4.0
## Bradley Beal 0.329 0.304 0.541 1.7 5.0
## fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## Anthony Bennett 0.972 0.3703704 35 3.5 5.1
## Marquese Chriss 0.952 0.3333333 34 2.6 3.8
## Patrick Patterson 0.957 0.2500000 97 3.2 4.3
## Jaren Jackson Jr. 0.974 0.4074074 35 3.0 3.8
## Cason Wallace 0.818 0.3500000 32 1.7 2.2
## Bradley Beal 0.905 0.3400000 37 3.6 4.7
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g
## Anthony Bennett 1.0 2.5 5.7 0.7 1.2
## Marquese Chriss 0.8 2.5 2.9 0.9 1.6
## Patrick Patterson 1.5 2.9 5.3 0.7 1.6
## Jaren Jackson Jr. 1.1 1.5 4.3 0.6 3.0
## Cason Wallace 4.3 0.9 2.8 2.0 0.5
## Bradley Beal 2.2 1.4 5.4 1.4 0.8
## tov_per_g pts_per_g_college
## Anthony Bennett 1.9 16.1
## Marquese Chriss 2.0 13.7
## Patrick Patterson 1.6 16.1
## Jaren Jackson Jr. 1.8 10.9
## Cason Wallace 2.1 11.7
## Bradley Beal 2.1 14.8
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)
fviz_eig(df_cbb.pca, addlabels = TRUE, main = "Statistics Represented in Lower Dimensional Components")
fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)
var <- get_pca_var(df_cbb.pca)
fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2) +
labs(title = "Quality of Representation to PCA Dimensions 1 and 2")
fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1) +
labs(title = "NBA Lottery Picks on PCA Dimensions 1 and 2")
summary(df_cbb.pca)
##
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 7.916 5.341 2.094 1.495 1.261 1.201 1.002
## % of var. 31.663 21.364 8.376 5.978 5.045 4.803 4.008
## Cumulative % of var. 31.663 53.027 61.403 67.381 72.426 77.229 81.237
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.967 0.751 0.556 0.446 0.420 0.327 0.267
## % of var. 3.867 3.006 2.225 1.786 1.679 1.310 1.067
## Cumulative % of var. 85.104 88.110 90.335 92.121 93.800 95.110 96.177
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.258 0.213 0.164 0.141 0.072 0.062 0.024
## % of var. 1.032 0.850 0.655 0.563 0.286 0.249 0.097
## Cumulative % of var. 97.209 98.059 98.714 99.277 99.564 99.813 99.910
## Dim.22 Dim.23 Dim.24 Dim.25
## Variance 0.011 0.006 0.004 0.002
## % of var. 0.046 0.023 0.014 0.007
## Cumulative % of var. 99.955 99.979 99.993 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2
## Anthony Bennett | 4.389 | 3.010 0.694 0.470 | 0.997 0.113 0.052 |
## Marquese Chriss | 4.114 | 2.704 0.560 0.432 | 0.402 0.018 0.010 |
## Patrick Patterson | 4.401 | 2.766 0.586 0.395 | -1.640 0.305 0.139 |
## Jaren Jackson Jr. | 4.049 | 1.503 0.173 0.138 | -1.816 0.374 0.201 |
## Cason Wallace | 4.400 | -2.425 0.450 0.304 | -0.572 0.037 0.017 |
## Bradley Beal | 2.297 | -0.965 0.071 0.176 | 0.450 0.023 0.038 |
## Markelle Fultz | 7.797 | -2.386 0.436 0.094 | 6.004 4.091 0.593 |
## Evan Turner | 4.173 | -1.376 0.145 0.109 | 0.802 0.073 0.037 |
## Steven Adams | 6.140 | 3.716 1.057 0.366 | -2.840 0.915 0.214 |
## John Henson | 6.097 | 3.263 0.815 0.286 | -2.676 0.812 0.193 |
## Dim.3 ctr cos2
## Anthony Bennett 2.370 1.626 0.292 |
## Marquese Chriss 1.318 0.503 0.103 |
## Patrick Patterson 0.330 0.031 0.006 |
## Jaren Jackson Jr. 0.874 0.221 0.047 |
## Cason Wallace -0.474 0.065 0.012 |
## Bradley Beal -0.187 0.010 0.007 |
## Markelle Fultz 2.275 1.498 0.085 |
## Evan Turner -1.019 0.301 0.060 |
## Steven Adams -2.476 1.774 0.163 |
## John Henson -0.607 0.107 0.010 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
## dunk_made | 0.835 8.802 0.697 | 0.336 2.117 0.113 | -0.025 0.029
## dunk_attempts | 0.829 8.675 0.687 | 0.338 2.140 0.114 | -0.025 0.030
## dunk_pct | 0.334 1.412 0.112 | -0.120 0.269 0.014 | 0.228 2.485
## rim_made | 0.584 4.305 0.341 | 0.672 8.468 0.452 | -0.088 0.370
## rim_attempts | 0.451 2.571 0.203 | 0.740 10.243 0.547 | -0.093 0.410
## rim_pct | 0.724 6.625 0.524 | -0.148 0.409 0.022 | -0.001 0.000
## rim_asted | 0.767 7.441 0.589 | -0.354 2.351 0.126 | 0.182 1.578
## other2pt_made | 0.106 0.141 0.011 | 0.601 6.772 0.362 | 0.560 14.964
## other2pt_attempts | 0.102 0.131 0.010 | 0.651 7.924 0.423 | 0.484 11.203
## other2pt_pct | 0.007 0.001 0.000 | -0.045 0.039 0.002 | 0.359 6.140
## cos2
## dunk_made 0.001 |
## dunk_attempts 0.001 |
## dunk_pct 0.052 |
## rim_made 0.008 |
## rim_attempts 0.009 |
## rim_pct 0.000 |
## rim_asted 0.033 |
## other2pt_made 0.313 |
## other2pt_attempts 0.235 |
## other2pt_pct 0.129 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.8233 2.3577 1.45399 1.29215 1.18882 1.10678 1.00292
## Proportion of Variance 0.3066 0.2138 0.08131 0.06422 0.05436 0.04711 0.03869
## Cumulative Proportion 0.3066 0.5204 0.60167 0.66589 0.72025 0.76736 0.80605
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.98328 0.87000 0.75739 0.66985 0.65034 0.64385 0.56902
## Proportion of Variance 0.03719 0.02911 0.02206 0.01726 0.01627 0.01594 0.01245
## Cumulative Proportion 0.84323 0.87234 0.89441 0.91166 0.92793 0.94388 0.95633
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.51470 0.49095 0.41133 0.40183 0.37408 0.26745 0.2038
## Proportion of Variance 0.01019 0.00927 0.00651 0.00621 0.00538 0.00275 0.0016
## Cumulative Proportion 0.96652 0.97579 0.98230 0.98851 0.99389 0.99664 0.9982
## PC22 PC23 PC24 PC25 PC26
## Standard deviation 0.1527 0.10681 0.07652 0.05949 0.04172
## Proportion of Variance 0.0009 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion 0.9991 0.99957 0.99980 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
## PC1 PC2
## Anthony Bennett -3.08683149 0.9658417837
## Marquese Chriss -2.77746275 0.4521718014
## Patrick Patterson -2.53963602 -2.1109010563
## Jaren Jackson Jr. -1.49089242 -1.7016335898
## Cason Wallace 2.34104930 -0.2152815400
## Bradley Beal 0.88415187 0.6225122311
## Markelle Fultz 2.05054203 6.2356705555
## Evan Turner 1.49673522 0.4578015058
## Steven Adams -3.66839570 -2.7680962536
## John Henson -2.97155356 -3.1904829161
## Malik Monk 2.28778612 1.6797126865
## Johnny Davis 1.33669032 -0.1478916084
## Stanley Johnson 1.16931864 1.1296582673
## Ben Simmons -3.55778734 6.3959006442
## Ochai Agbaji 1.65972813 -3.1665060860
## Alec Burks 0.74320461 2.0264013166
## Ja Morant 2.49001353 3.1186017067
## Marcus Morris -0.73895771 -2.6323130400
## Chris Duarte 1.92568686 -1.3515954340
## Trey Lyles -2.26213434 -2.3189123450
## Wes Johnson -1.50316126 1.3047269862
## Willie Cauley-Stein -2.65175786 -4.0213075524
## Paolo Banchero -0.71365922 2.7656057765
## Marcus Smart 2.59113529 1.9709358714
## Alex Len -3.60602390 -2.2253867363
## Jett Howard 2.91653599 -1.5725624278
## Greg Monroe -0.55772278 0.6192425231
## Derrick Williams -1.03830203 1.0304451116
## Jordan Hawkins 3.13721836 -2.6918367917
## Andrew Wiggins -0.36542307 2.6025198229
## Anthony Edwards 1.40251473 2.6178813170
## CJ McCollum 3.90112719 0.5500261983
## Cole Aldrich -2.78307370 -3.9300845772
## Brandon Knight 3.54151495 2.0093567331
## Kemba Walker 3.18777727 0.5896508288
## De'Andre Hunter 0.77514281 -2.0904369415
## Al-Farouq Aminu -1.02295866 0.6557701582
## Cody Zeller -2.02496946 0.5680230150
## Doug McDermott 1.01124840 -1.4023518631
## Bam Adebayo -5.99686501 1.1625539440
## Dereck Lively II -5.06532407 -4.9338352381
## Obi Toppin -3.01791562 -0.9098400603
## Jamal Murray 2.37633667 1.7128111826
## Luke Kennard 2.76829877 -0.9772815337
## Keegan Murray -1.59473589 -0.7127863270
## Justise Winslow 0.36808543 0.0140660045
## James Bouknight 1.52674532 -0.8368198759
## Jeremy Lamb 0.50899046 -2.6527408299
## Jalen Smith -2.10897015 -1.8201712718
## Trey Burke 3.60982350 0.5620071724
## Gordon Hayward 0.68966882 -0.8690154013
## Zach Collins -2.23468578 -2.2257336106
## Onyeka Okongwu -5.79890613 2.6047417619
## Rui Hachimura -0.89990828 -2.6695528608
## Jimmer Fredette 4.12885438 -0.1532925420
## Joel Embiid -4.49751364 -0.3052975967
## Jalen Suggs 1.57680464 1.3826079667
## Trae Young 6.66738911 7.9875515113
## Denzel Valentine 3.08887719 -3.2154062834
## Shabazz Muhammad -0.36507564 2.1338042145
## Devin Booker 1.28579904 -3.5063787159
## Michael Carter-Williams 2.91272861 -0.8684079705
## D'Angelo Russell 2.98270993 2.6314533501
## Thomas Robinson -0.41276682 -2.4752367530
## Markieff Morris -1.29699896 -4.0373945397
## Cameron Payne 4.48285978 1.3236954751
## Damian Lillard 4.29694802 0.3843340708
## Brandon Miller 2.00076132 1.9889065138
## Gradey Dick 1.44740283 -1.2927758040
## Terrence Ross 1.18817176 -2.5860223513
## Joshua Primo 1.55551858 -3.7707023835
## Buddy Hield 3.54777620 -1.4535258538
## Nerlens Noel -5.45383914 0.7991793564
## Scottie Barnes 0.41005879 -0.0002825293
## Devin Vassell 0.72773722 -4.1046446591
## Davion Mitchell 3.51406172 -1.6468468321
## Dion Waiters 1.92316128 -2.9143303356
## Jakob Poeltl -3.07463033 -1.0055357287
## Tyrese Haliburton 2.28374305 -2.7467761148
## Collin Sexton 2.52183153 3.9991906129
## Taurean Prince 1.44792106 -3.7467662602
## Jalen Williams 1.80913049 -1.6958213307
## Jaxson Hayes -6.10602196 -1.9333235932
## Paul George 2.10747433 -0.1876406410
## Frank Kaminsky 0.20730366 -4.1181677131
## Jaylen Brown 0.86593793 2.2952001302
## Lauri Markkanen 0.18748839 -0.1368727294
## Shai Gilgeous-Alexander 2.04551424 2.3234644917
## Victor Oladipo 0.63903222 -2.5888631078
## Kelly Olynyk -0.73492325 -3.8811181762
## Domantas Sabonis -1.67699707 -1.2190984214
## Kevin Knox 0.63604976 0.7501807107
## Mo Bamba -5.12168936 0.6472841174
## Julius Randle -2.20418418 3.0357101236
## Otto Porter Jr. -0.04885497 -1.8356037020
## Nik Stauskas 3.13687982 -1.8802991525
## Patrick Williams 0.12807727 -1.3341060184
## Kendall Marshall 4.60320232 -1.8524037126
## Aaron Gordon -2.71643469 0.5168844660
## Kira Lewis Jr. 3.26278428 0.0954317440
## Jabari Smith Jr. 1.62886867 1.4477810399
## Isaac Okoro -0.42993503 0.2975331821
## Jaden Ivey 1.86477689 -0.0773337674
## Donovan Mitchell 3.15753551 -1.8102604526
## Franz Wagner 1.24385093 -2.2215940450
## Josh Jackson -1.48957168 3.0475464620
## Derrick Favors -5.01881131 0.5372658952
## Jabari Parker -2.40112005 3.8466488862
## Taylor Hendricks -1.15461505 -0.2239571117
## Meyers Leonard -2.74024056 -3.6508169349
## Marvin Bagley III -5.65427831 4.5448753922
## Cade Cunningham 2.90919016 4.4345298643
## T.J. Warren -1.08912829 0.3669642309
## Evan Mobley -4.26687599 2.6519044433
## De'Aaron Fox 0.87066774 3.3180112387
## Lonzo Ball 0.30972840 -0.0007053551
## Elfrid Payton 2.00506164 0.8904543832
## John Wall 1.73494610 3.9455798842
## Klay Thompson 3.84131304 -0.2800845197
## Myles Turner -1.86401362 -1.8030642874
## Jerome Robinson 3.05834301 -0.2586574385
## Anthony Davis -6.73630616 1.2629548886
## Jalen Duren -5.98165537 0.5776256581
## Tristan Thompson -4.60556911 2.1556621458
## Noah Vonleh -1.11130925 0.1751834144
## Jahlil Okafor -5.47754196 2.7772955807
## Bennedict Mathurin 1.07663046 -1.0805356809
## Mikal Bridges 1.20162590 -3.5903451628
## Xavier Henry 1.24518982 -0.8915202286
## Ekpe Udoh -2.87709283 2.0867754797
## Chet Holmgren -4.15750678 -0.0605690899
## Coby White 3.35981962 0.9338304491
## DeMarcus Cousins -4.42466023 2.5278733794
## Cam Reddish 4.14985479 -0.0842589880
## Michael Kidd-Gilchrist -1.49000302 0.5168492854
## Dennis Smith Jr. 2.43263222 4.0080211081
## Kentavious Caldwell-Pope 2.25315004 -0.8810443210
## Zach LaVine 1.60422247 -2.6481909219
## Karl-Anthony Towns -2.08103954 -1.1196065654
## Jeremy Sochan -0.86229364 -1.9652843807
## Ziaire Williams 1.78971475 -0.2486334187
## Wendell Carter Jr. -3.36498747 0.7216499359
## Brandon Ingram 1.66287033 1.8607679236
## Andre Drummond -5.95363392 -0.5511351054
## Austin Rivers 2.95329868 1.3791937733
## RJ Barrett 0.08315247 5.0528618008
## Aaron Nesmith 3.28346405 -1.6855751483
## Jonathan Isaac -1.48927680 -0.4512240082
## Ben McLemore -0.24239908 0.1870327074
## Deandre Ayton -6.32878662 3.9424880892
## Romeo Langford 0.73466257 2.2444498661
## Jayson Tatum 0.78010288 2.6444271533
## Kyrie Irving 3.77122159 3.0033308087
## Moses Moody 1.46960252 1.4190061757
## Miles Bridges 0.51099544 -0.8957909493
## Kris Dunn 2.69748838 -0.0951825819
## P.J. Washington -0.33949853 -1.0835362435
## Tyler Herro 1.81846173 -0.4356215772
## Jarrett Culver 1.51221150 -0.0681535683
## Zion Williamson -5.07447989 4.6504795504
## Anthony Black 1.20227004 1.7148572319
## Cameron Johnson 1.47106716 -2.1704255345
## Ed Davis -3.31971345 -3.1931177330
## Jarace Walker -1.30148259 -1.2591517315
## Harrison Barnes 1.11071750 -0.4675508975
fviz_nbclust(df_cluster, kmeans, method = 'wss')
fviz_nbclust(df_cluster, kmeans, method = 'silhouette')
fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')
k <- 15
df_cbb.kmeans <- kmeans(df_cluster, centers = k, nstart = 50)
df_cbb.kmeans2 <- kmeans(df_cluster, centers = 10, nstart = 50)
df_cbb.kmeans3 <- kmeans(df_cluster, centers = 5, nstart = 50)
df_cbb.kmeans
## K-means clustering with 15 clusters of sizes 9, 1, 8, 9, 8, 16, 7, 18, 25, 6, 18, 1, 14, 13, 12
##
## Cluster means:
## PC1 PC2
## 1 -5.6518151 0.2441231
## 2 -3.5577873 6.3959006
## 3 -5.2038873 3.2319150
## 4 -3.1225646 -3.5500581
## 5 2.6116359 3.9220472
## 6 -1.7554870 -1.6598201
## 7 -1.0651259 3.2120996
## 8 0.9023738 -3.0037819
## 9 1.1995276 -0.4390785
## 10 -3.1633860 0.7804591
## 11 1.5229678 1.9259630
## 12 6.6673891 7.9875515
## 13 3.6051291 0.4968607
## 14 3.0891886 -1.8536157
## 15 -1.0961484 0.4205491
##
## Clustering vector:
## Anthony Bennett Marquese Chriss Patrick Patterson
## 10 10 6
## Jaren Jackson Jr. Cason Wallace Bradley Beal
## 6 9 9
## Markelle Fultz Evan Turner Steven Adams
## 5 9 4
## John Henson Malik Monk Johnny Davis
## 4 11 9
## Stanley Johnson Ben Simmons Ochai Agbaji
## 11 2 8
## Alec Burks Ja Morant Marcus Morris
## 11 5 6
## Chris Duarte Trey Lyles Wes Johnson
## 9 6 15
## Willie Cauley-Stein Paolo Banchero Marcus Smart
## 4 7 11
## Alex Len Jett Howard Greg Monroe
## 4 14 15
## Derrick Williams Jordan Hawkins Andrew Wiggins
## 15 14 7
## Anthony Edwards CJ McCollum Cole Aldrich
## 11 13 4
## Brandon Knight Kemba Walker De'Andre Hunter
## 13 13 8
## Al-Farouq Aminu Cody Zeller Doug McDermott
## 15 15 9
## Bam Adebayo Dereck Lively II Obi Toppin
## 1 4 6
## Jamal Murray Luke Kennard Keegan Murray
## 11 14 6
## Justise Winslow James Bouknight Jeremy Lamb
## 9 9 8
## Jalen Smith Trey Burke Gordon Hayward
## 6 13 9
## Zach Collins Onyeka Okongwu Rui Hachimura
## 6 3 6
## Jimmer Fredette Joel Embiid Jalen Suggs
## 13 1 11
## Trae Young Denzel Valentine Shabazz Muhammad
## 12 14 7
## Devin Booker Michael Carter-Williams D'Angelo Russell
## 8 14 5
## Thomas Robinson Markieff Morris Cameron Payne
## 8 4 13
## Damian Lillard Brandon Miller Gradey Dick
## 13 11 9
## Terrence Ross Joshua Primo Buddy Hield
## 8 8 14
## Nerlens Noel Scottie Barnes Devin Vassell
## 1 9 8
## Davion Mitchell Dion Waiters Jakob Poeltl
## 14 8 6
## Tyrese Haliburton Collin Sexton Taurean Prince
## 14 5 8
## Jalen Williams Jaxson Hayes Paul George
## 14 1 9
## Frank Kaminsky Jaylen Brown Lauri Markkanen
## 8 11 9
## Shai Gilgeous-Alexander Victor Oladipo Kelly Olynyk
## 11 8 8
## Domantas Sabonis Kevin Knox Mo Bamba
## 6 9 1
## Julius Randle Otto Porter Jr. Nik Stauskas
## 7 8 14
## Patrick Williams Kendall Marshall Aaron Gordon
## 9 14 10
## Kira Lewis Jr. Jabari Smith Jr. Isaac Okoro
## 13 11 15
## Jaden Ivey Donovan Mitchell Franz Wagner
## 9 14 8
## Josh Jackson Derrick Favors Jabari Parker
## 7 1 7
## Taylor Hendricks Meyers Leonard Marvin Bagley III
## 15 4 3
## Cade Cunningham T.J. Warren Evan Mobley
## 5 15 3
## De'Aaron Fox Lonzo Ball Elfrid Payton
## 11 9 11
## John Wall Klay Thompson Myles Turner
## 5 13 6
## Jerome Robinson Anthony Davis Jalen Duren
## 13 1 1
## Tristan Thompson Noah Vonleh Jahlil Okafor
## 3 15 3
## Bennedict Mathurin Mikal Bridges Xavier Henry
## 9 8 9
## Ekpe Udoh Chet Holmgren Coby White
## 10 10 13
## DeMarcus Cousins Cam Reddish Michael Kidd-Gilchrist
## 3 13 15
## Dennis Smith Jr. Kentavious Caldwell-Pope Zach LaVine
## 5 9 8
## Karl-Anthony Towns Jeremy Sochan Ziaire Williams
## 6 6 9
## Wendell Carter Jr. Brandon Ingram Andre Drummond
## 10 11 1
## Austin Rivers RJ Barrett Aaron Nesmith
## 13 7 14
## Jonathan Isaac Ben McLemore Deandre Ayton
## 15 15 3
## Romeo Langford Jayson Tatum Kyrie Irving
## 11 11 5
## Moses Moody Miles Bridges Kris Dunn
## 11 9 13
## P.J. Washington Tyler Herro Jarrett Culver
## 6 9 9
## Zion Williamson Anthony Black Cameron Johnson
## 3 11 8
## Ed Davis Jarace Walker Harrison Barnes
## 4 6 9
##
## Within cluster sum of squares by cluster:
## [1] 11.979596 0.000000 10.588637 13.335329 11.493769 15.484948 11.267129
## [8] 19.035413 19.986620 4.094306 12.092124 0.000000 10.282343 10.612227
## [15] 5.400759
## (between_SS / total_SS = 93.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 4, pointsize = 1, show.clust.cent = FALSE, repel = TRUE,
xlab = "Dimension 1", ylab = "Dimension 2", main = "Clustering with K-means") + scale_y_reverse()
cluster_assignments <- df_cbb.kmeans$cluster
cluster_df15 <- data.frame(value = cluster_assignments, name = names(cluster_assignments))
cluster_assignments2 <- df_cbb.kmeans2$cluster
cluster_df10 <- data.frame(value = cluster_assignments2, name = names(cluster_assignments2))
cluster_assignments3 <- df_cbb.kmeans3$cluster
cluster_df5 <- data.frame(value = cluster_assignments3, name = names(cluster_assignments3))
cluster_df15 <- as.tibble(cluster_df15)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cluster_df10 <- as.tibble(cluster_df10)
cluster_df5 <- as.tibble(cluster_df5)
cluster_df15 <- cluster_df15 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df10 <- cluster_df10 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df5 <- cluster_df5 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df15
## # A tibble: 165 × 2
## name pc_cluster
## <chr> <int>
## 1 Anthony Bennett 10
## 2 Marquese Chriss 10
## 3 Patrick Patterson 6
## 4 Jaren Jackson Jr. 6
## 5 Cason Wallace 9
## 6 Bradley Beal 9
## 7 Markelle Fultz 5
## 8 Evan Turner 9
## 9 Steven Adams 4
## 10 John Henson 4
## # ℹ 155 more rows
df_cbb.sample$group <- df_cbb.kmeans$cluster
df_cbb.sample <- df_cbb.sample |> relocate(group, .before = dunk_made)
head(df_cbb.sample)
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Anthony Bennett 10 1.5142857 1.6571429 0.914 2.857143 4.000000
## Marquese Chriss 10 1.5294118 1.9117647 0.800 3.000000 4.264706
## Patrick Patterson 6 0.6701031 0.7216495 0.929 1.402062 1.701031
## Jaren Jackson Jr. 6 0.8857143 0.8857143 1.000 1.742857 2.657143
## Cason Wallace 9 0.3437500 0.3437500 1.000 1.625000 2.281250
## Bradley Beal 9 0.4864865 0.5405405 0.900 2.405405 3.702703
## rim_pct rim_asted other2pt_made other2pt_attempts
## Anthony Bennett 0.714 0.620 1.6000000 3.685714
## Marquese Chriss 0.703 0.529 1.6470588 3.911765
## Patrick Patterson 0.824 0.647 0.5154639 1.309278
## Jaren Jackson Jr. 0.656 0.459 0.5714286 1.228571
## Cason Wallace 0.712 0.096 1.3437500 3.500000
## Bradley Beal 0.650 0.348 0.6216216 1.891892
## other2pt_pct other2pt_asted fg2_pct fg3_per_g fg3a_per_g
## Anthony Bennett 0.434 0.768 0.587 1.0 2.7
## Marquese Chriss 0.421 0.589 0.568 0.6 1.8
## Patrick Patterson 0.394 0.600 0.604 0.2 0.8
## Jaren Jackson Jr. 0.465 0.600 0.596 1.1 2.7
## Cason Wallace 0.384 0.093 0.514 1.4 4.0
## Bradley Beal 0.329 0.304 0.541 1.7 5.0
## fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## Anthony Bennett 0.972 0.3703704 35 3.5 5.1
## Marquese Chriss 0.952 0.3333333 34 2.6 3.8
## Patrick Patterson 0.957 0.2500000 97 3.2 4.3
## Jaren Jackson Jr. 0.974 0.4074074 35 3.0 3.8
## Cason Wallace 0.818 0.3500000 32 1.7 2.2
## Bradley Beal 0.905 0.3400000 37 3.6 4.7
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g
## Anthony Bennett 1.0 2.5 5.7 0.7 1.2
## Marquese Chriss 0.8 2.5 2.9 0.9 1.6
## Patrick Patterson 1.5 2.9 5.3 0.7 1.6
## Jaren Jackson Jr. 1.1 1.5 4.3 0.6 3.0
## Cason Wallace 4.3 0.9 2.8 2.0 0.5
## Bradley Beal 2.2 1.4 5.4 1.4 0.8
## tov_per_g pts_per_g_college
## Anthony Bennett 1.9 16.1
## Marquese Chriss 2.0 13.7
## Patrick Patterson 1.6 16.1
## Jaren Jackson Jr. 1.8 10.9
## Cason Wallace 2.1 11.7
## Bradley Beal 2.1 14.8
df_cbb.sample |> group_by(group) |>
summarize(across(everything(), mean)) |> print(n = 15, width = Inf)
## # A tibble: 15 × 27
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 2.09 2.20 0.955 3.54 4.56 0.781
## 2 2 1.70 1.85 0.918 4.82 6.67 0.723
## 3 3 2.06 2.18 0.946 4.94 6.46 0.764
## 4 4 0.745 0.791 0.943 1.45 1.98 0.747
## 5 5 0.387 0.428 0.770 2.70 4.35 0.624
## 6 6 0.660 0.716 0.915 1.94 2.65 0.735
## 7 7 1.17 1.28 0.915 3.34 5.06 0.662
## 8 8 0.289 0.323 0.916 1.10 1.63 0.683
## 9 9 0.435 0.480 0.916 1.63 2.44 0.671
## 10 10 1.43 1.55 0.930 3.03 4.19 0.725
## 11 11 0.468 0.514 0.912 2.30 3.65 0.633
## 12 12 0 0 0 3.28 6.28 0.522
## 13 13 0.108 0.125 0.899 1.18 1.98 0.602
## 14 14 0.137 0.160 0.802 0.865 1.40 0.623
## 15 15 0.823 0.890 0.929 2.45 3.62 0.678
## rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.647 0.922 2.70 0.344 0.498 0.618
## 2 0.484 1.67 4.88 0.342 0.309 0.561
## 3 0.546 1.62 3.88 0.424 0.376 0.63
## 4 0.698 0.608 1.57 0.357 0.626 0.580
## 5 0.221 1.52 4.03 0.37 0.083 0.509
## 6 0.583 0.884 2.10 0.423 0.474 0.595
## 7 0.440 1.90 5.17 0.365 0.291 0.511
## 8 0.492 0.647 1.58 0.401 0.346 0.548
## 9 0.404 0.909 2.41 0.375 0.262 0.525
## 10 0.543 1.47 3.73 0.401 0.463 0.583
## 11 0.309 1.31 3.56 0.362 0.161 0.501
## 12 0.114 1.19 2.78 0.427 0.026 0.493
## 13 0.238 0.887 2.31 0.380 0.186 0.476
## 14 0.279 0.573 1.43 0.397 0.136 0.500
## 15 0.478 0.875 2.38 0.357 0.359 0.564
## fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0667 0.278 0.317 0.0549 32.3 2.67 4.22
## 2 0 0.1 1 0 33 6 9
## 3 0.262 0.812 0.578 0.168 34.2 3.76 6.05
## 4 0.0444 0.211 0.444 0.0648 76.2 1.66 2.72
## 5 1.75 4.56 0.479 0.379 33.1 4.84 6.25
## 6 0.5 1.44 0.847 0.340 60.4 2.72 3.8
## 7 1.1 3.21 0.891 0.331 36.3 4.13 5.86
## 8 1.17 3.12 0.876 0.405 80.9 1.91 2.54
## 9 1.54 4.21 0.784 0.362 53.4 2.87 3.72
## 10 0.667 1.82 0.921 0.356 35.3 2.8 4.28
## 11 1.52 4.29 0.731 0.342 41.6 4.07 5.38
## 12 3.7 10.3 0.263 0.359 32 7.4 8.6
## 13 1.95 5.3 0.614 0.367 78.4 3.69 4.69
## 14 1.8 4.67 0.750 0.381 74.5 2.23 2.81
## 15 0.8 2.17 0.808 0.320 47.8 3.59 4.92
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.956 2.83 5.59 1 2.89 1.73
## 2 4.8 3.1 8.6 2 0.8 3.4
## 3 1.54 3.54 5.84 1.02 1.98 2.16
## 4 0.844 2.24 4.36 0.544 1.96 1.3
## 5 5.4 0.95 4 1.52 0.55 3.32
## 6 1.23 2.1 4.68 0.7 1.39 1.55
## 7 2.2 2.43 5.14 1.03 0.786 2.44
## 8 1.51 1.21 3.24 0.95 0.544 1.32
## 9 2.36 1.30 4.33 1.22 0.58 2.05
## 10 1.73 2.67 5.72 0.817 2.22 1.93
## 11 2.94 1.33 4.29 1.38 0.633 2.47
## 12 8.7 0.4 3.5 1.7 0.3 5.2
## 13 3.81 0.693 3.34 1.41 0.364 2.7
## 14 3.28 0.808 3.08 1.15 0.423 1.76
## 15 1.59 2.31 5.01 1.15 1.18 2.12
## pts_per_g_college
## <dbl>
## 1 11.8
## 2 19.2
## 3 17.7
## 4 8.36
## 5 19.1
## 6 12.3
## 7 17.9
## 8 11.0
## 9 14.4
## 10 14.0
## 11 16.6
## 12 27.4
## 13 16.9
## 14 12.4
## 15 14.7
df_cbb.sample |> group_by(group) |>
summarize(across(everything(), median))
## # A tibble: 15 × 27
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 2.31 2.34 0.956 3.63 4.77 0.768
## 2 2 1.70 1.85 0.918 4.82 6.67 0.723
## 3 3 2.01 2.13 0.953 5.05 6.59 0.774
## 4 4 0.657 0.705 0.944 1.17 1.54 0.752
## 5 5 0.345 0.405 0.91 2.66 4.33 0.628
## 6 6 0.566 0.641 0.932 1.87 2.59 0.728
## 7 7 1.05 1.13 0.924 3.3 4.92 0.67
## 8 8 0.285 0.299 0.907 1.09 1.53 0.691
## 9 9 0.453 0.453 0.925 1.62 2.45 0.667
## 10 10 1.51 1.61 0.951 3.12 4.13 0.708
## 11 11 0.447 0.457 0.916 2.22 3.71 0.640
## 12 12 0 0 0 3.28 6.28 0.522
## 13 13 0.108 0.108 0.966 1.04 1.70 0.594
## 14 14 0.136 0.167 0.875 0.807 1.35 0.623
## 15 15 0.806 0.885 0.938 2.43 3.58 0.68
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## # fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## # ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## # blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
df_career_stats |> filter(player == "Jaylen Brown") |>
select(player, pick_overall, year, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## # A tibble: 1 × 8
## player pick_overall year pts_per_g trb_per_g ast_per_g vorp g
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Jaylen Brown 3 2016 18.6 5.3 2.4 9.7 540
df_career_stats |> filter(year >= 2010)|>
group_by(pick_overall) |>
summarize(across(everything(), function(x) mean(x))) |>
select(pick_overall, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## Warning: There were 122 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) mean(x))`.
## ℹ In group 1: `pick_overall = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 121 remaining warnings.
## # A tibble: 61 × 6
## pick_overall pts_per_g trb_per_g ast_per_g vorp g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.9 6.65 4.28 14.2 380.
## 2 2 14.7 5.14 3.19 3.91 338.
## 3 3 17.4 6.58 3.31 12.0 420.
## 4 4 12 5.32 1.91 3 372.
## 5 5 12.5 4.61 3.36 4.84 377.
## 6 6 9.92 4.5 2.17 6.57 346.
## 7 7 12.4 5.08 2.51 3.77 430.
## 8 8 9.21 3.31 1.91 1.71 390.
## 9 9 10.3 4.76 2.31 5.61 418.
## 10 10 9.74 3.46 2.13 5.14 370.
## # ℹ 51 more rows
df_cluster2 <- df_cbb_scaled
df_cluster2$name <- rownames(df_cbb)
df_cluster2 <- df_cluster2 |> relocate(name, .before = dunk_made)
df_cluster2 <- column_to_rownames(df_cluster2, var = "name")
k1 <- 15
k2 <- 10
k3 <- 5
k15 <- kmeans(df_cluster2, centers = k1, nstart = 50)
k10 <- kmeans(df_cluster2, centers = k2, nstart = 50)
k5 <- kmeans(df_cluster2, centers = k3, nstart = 50)
temp_assign <- k15$cluster
combine <- function(df, kmeans) {
temp_assign <- kmeans$cluster
df_temp <- as.tibble(data.frame(value = temp_assign, name = names(temp_assign)))
if(!("all_cluster" %in% colnames(df_temp))) {
df_temp <- df_temp |> rename(all_cluster = value)
}
cluster_df <- left_join(df, df_temp, by = c("name"))
cluster_df <- cluster_df |> relocate(pc_cluster, .after = all_cluster)
# cluster_df <- cluster_df |> group_by(all_cluster, pc_cluster) |> summarize(n = n())
# return(cluster_df)
return(cluster_df)
}
groups15 <- combine(cluster_df15, k15) |> arrange(pc_cluster, all_cluster)
groups10 <- combine(cluster_df10, k10)
groups5 <- combine(cluster_df5, k5)
groups15
## # A tibble: 165 × 3
## name all_cluster pc_cluster
## <chr> <int> <int>
## 1 Mo Bamba 3 1
## 2 Anthony Davis 3 1
## 3 Bam Adebayo 11 1
## 4 Nerlens Noel 11 1
## 5 Jaxson Hayes 11 1
## 6 Derrick Favors 11 1
## 7 Jalen Duren 11 1
## 8 Andre Drummond 11 1
## 9 Joel Embiid 15 1
## 10 Ben Simmons 7 2
## # ℹ 155 more rows
df_cbb <- df_cbb |> rownames_to_column(var = "name")
df_groups <- left_join(df_cbb, groups15, by = c("name"))
df_cbb <- df_cbb |> column_to_rownames(var = "name")
library(ggforce)
df_groups <- df_groups |>
mutate(bust = if_else(name %in% bust_list, 1, 0),
good = if_else(name %in% good_list, 1, 0))
df_groups |> group_by(all_cluster) |>
summarize(n = n(),
across(everything(), mean),
ratio = good / bust) |>
select(-name) |>
print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `all_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
## all_cluster n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 0.433 0.481 0.902 1.98 3.01
## 2 2 9 0.543 0.589 0.896 3.14 5.06
## 3 3 3 2.12 2.17 0.979 3.57 4.34
## 4 4 22 0.684 0.743 0.920 1.88 2.70
## 5 5 2 0 0 0 1.42 2.14
## 6 6 1 0 0 0 3.28 6.28
## 7 7 4 2.30 2.44 0.942 5.86 7.67
## 8 8 22 0.263 0.294 0.887 1.10 1.78
## 9 9 14 1.28 1.39 0.925 3.13 4.45
## 10 10 7 0.411 0.445 0.934 1.88 2.72
## 11 11 10 1.96 2.09 0.941 3.73 5.00
## 12 12 12 0.0783 0.0854 0.932 1.11 1.83
## 13 13 14 0.376 0.430 0.896 1.23 1.72
## 14 14 19 0.466 0.512 0.907 1.98 3.13
## 15 15 11 0.780 0.823 0.947 1.78 2.40
## rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.663 0.314 0.574 1.92 0.290 0.176
## 2 0.618 0.228 1.62 4.33 0.366 0.0983
## 3 0.825 0.556 0.964 2.55 0.391 0.281
## 4 0.700 0.548 0.884 2.14 0.414 0.474
## 5 0.664 0.062 0.788 1.82 0.438 0.024
## 6 0.522 0.114 1.19 2.78 0.427 0.026
## 7 0.763 0.529 1.58 3.84 0.428 0.402
## 8 0.621 0.407 0.585 1.63 0.357 0.272
## 9 0.707 0.497 1.65 4.31 0.382 0.409
## 10 0.698 0.300 0.731 1.71 0.449 0.104
## 11 0.745 0.621 1.06 2.90 0.366 0.456
## 12 0.613 0.214 0.907 2.25 0.402 0.181
## 13 0.719 0.538 0.733 1.69 0.438 0.313
## 14 0.637 0.372 1.65 4.32 0.382 0.220
## 15 0.750 0.681 0.685 1.80 0.357 0.606
## fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.514 0.947 2.89 0.724 0.313 61.6 3.55 5.07
## 2 0.508 1.41 4 0.582 0.349 36.7 4.62 6.17
## 3 0.664 0.633 1.83 0.879 0.296 34 2.83 4.07
## 4 0.576 0.955 2.43 0.893 0.434 59.3 3.09 4.09
## 5 0.526 1.15 2.65 0.572 0.423 42 3.65 4.35
## 6 0.493 3.7 10.3 0.263 0.359 32 7.4 8.6
## 7 0.648 0.425 1.28 0.905 0.252 33.5 4.47 6.75
## 8 0.496 1.83 4.94 0.780 0.369 65.6 2.28 2.87
## 9 0.550 0.629 1.8 0.940 0.308 38.9 3.47 5.12
## 10 0.584 1.53 4.03 0.717 0.370 41.9 1.86 2.57
## 11 0.610 0 0.02 0 0 36.7 2.9 4.79
## 12 0.488 1.98 5.3 0.602 0.371 83.5 3.82 4.78
## 13 0.570 0.857 2.33 0.924 0.356 82.9 2.04 2.77
## 14 0.489 1.82 4.98 0.777 0.359 40.2 3.83 4.91
## 15 0.589 0.0182 0.182 0.432 0.0341 64.6 1.97 3.15
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.27 1.51 4.41 1.6 0.753 2.68
## 2 5.32 1.02 4.12 1.46 0.522 3.28
## 3 1.23 2.67 7.6 1 4.03 1.47
## 4 1.31 2.06 5.18 0.764 1.31 1.69
## 5 6.15 0.35 2.5 1.3 0.3 2.55
## 6 8.7 0.4 3.5 1.7 0.3 5.2
## 7 2.5 3.5 7.32 1.38 1.35 2.52
## 8 1.91 0.982 3.33 1.07 0.409 1.66
## 9 1.84 2.78 5.43 1.04 1.54 2.16
## 10 4.57 0.914 3.54 1.8 0.571 2.26
## 11 1.04 3.02 4.88 1 2.27 1.9
## 12 3.62 0.7 3.3 1.35 0.35 2.48
## 13 1.32 1.39 3.24 0.871 0.643 1.34
## 14 2.2 1.45 4.28 1.06 0.6 2.18
## 15 0.927 2.28 4.5 0.564 1.95 1.4
## pts_per_g_college pc_cluster bust good ratio
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 13.9 11.3 0 0.333 Inf
## 2 18.8 6.56 0 0.444 Inf
## 3 13.7 4 0 0.667 Inf
## 4 13.9 8.77 0.136 0.409 3
## 5 12.4 9.5 0.5 0.5 1
## 6 27.4 12 0 1 Inf
## 7 20.7 2.75 0 0.5 Inf
## 8 12.9 10.7 0.182 0.273 1.5
## 9 15.4 9.21 0.143 0.357 2.5
## 10 12.6 10.7 0 0.429 Inf
## 11 13.1 3 0.1 0.5 5
## 12 17.4 13.1 0.0833 0.5 6
## 13 10.7 7.36 0 0.286 Inf
## 14 17.1 9.95 0.158 0.316 2
## 15 9.08 4.09 0 0.455 Inf
df_pc <- left_join(df_cluster |> rownames_to_column("name"), groups15, by = "name")
df_pc <- left_join(df_pc, df_groups |> select(name, bust, good), by = "name")
df_pc <- df_pc |> column_to_rownames("name")
df_pc <- df_pc |>
mutate(
pc_cluster = as.factor(pc_cluster),
all_cluster = as.factor(all_cluster)
)
hulls <- df_pc |>
group_by(all_cluster) |>
slice(chull(PC1, PC2))
p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = all_cluster)) +
geom_polygon(data = hulls,
aes(group = all_cluster, color = all_cluster, fill = all_cluster),
alpha = 0.2) +
geom_point() +
geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) +
geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
scale_x_reverse() +
labs(title = "Clusters using higher dimensional data",
x = "Dimension 1",
y = "Dimension 2",
color = "Cluster",
fill = "Cluster"
)
p
p2 <- p +
geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
aes(label = name),
size = 1.6,
max.overlaps = 20,
fill = NA,
label.size = NA,
segment.size = 0.2
)
p2
df_groups |> group_by(pc_cluster) |>
summarize(n = n(),
across(everything(), mean),
ratio = good / bust) |>
select(-name) |>
print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `pc_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
## pc_cluster n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 9 2.09 2.20 0.955 3.54 4.56
## 2 2 1 1.70 1.85 0.918 4.82 6.67
## 3 3 8 2.06 2.18 0.946 4.94 6.46
## 4 4 9 0.745 0.791 0.943 1.45 1.98
## 5 5 8 0.387 0.428 0.770 2.70 4.35
## 6 6 16 0.660 0.716 0.915 1.94 2.65
## 7 7 7 1.17 1.28 0.915 3.34 5.06
## 8 8 18 0.289 0.323 0.916 1.10 1.63
## 9 9 25 0.435 0.480 0.916 1.63 2.44
## 10 10 6 1.43 1.55 0.930 3.03 4.19
## 11 11 18 0.468 0.514 0.912 2.30 3.65
## 12 12 1 0 0 0 3.28 6.28
## 13 13 14 0.108 0.125 0.899 1.18 1.98
## 14 14 13 0.137 0.160 0.802 0.865 1.40
## 15 15 12 0.823 0.890 0.929 2.45 3.62
## rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.781 0.647 0.922 2.70 0.344 0.498
## 2 0.723 0.484 1.67 4.88 0.342 0.309
## 3 0.764 0.546 1.62 3.88 0.424 0.376
## 4 0.747 0.698 0.608 1.57 0.357 0.626
## 5 0.624 0.221 1.52 4.03 0.37 0.083
## 6 0.735 0.583 0.884 2.10 0.423 0.474
## 7 0.662 0.440 1.90 5.17 0.365 0.291
## 8 0.683 0.492 0.647 1.58 0.401 0.346
## 9 0.671 0.404 0.909 2.41 0.375 0.262
## 10 0.725 0.543 1.47 3.73 0.401 0.463
## 11 0.633 0.309 1.31 3.56 0.362 0.161
## 12 0.522 0.114 1.19 2.78 0.427 0.026
## 13 0.602 0.238 0.887 2.31 0.380 0.186
## 14 0.623 0.279 0.573 1.43 0.397 0.136
## 15 0.678 0.478 0.875 2.38 0.357 0.359
## fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.618 0.0667 0.278 0.317 0.0549 32.3 2.67 4.22
## 2 0.561 0 0.1 1 0 33 6 9
## 3 0.63 0.262 0.812 0.578 0.168 34.2 3.76 6.05
## 4 0.580 0.0444 0.211 0.444 0.0648 76.2 1.66 2.72
## 5 0.509 1.75 4.56 0.479 0.379 33.1 4.84 6.25
## 6 0.595 0.5 1.44 0.847 0.340 60.4 2.72 3.8
## 7 0.511 1.1 3.21 0.891 0.331 36.3 4.13 5.86
## 8 0.548 1.17 3.12 0.876 0.405 80.9 1.91 2.54
## 9 0.525 1.54 4.21 0.784 0.362 53.4 2.87 3.72
## 10 0.583 0.667 1.82 0.921 0.356 35.3 2.8 4.28
## 11 0.501 1.52 4.29 0.731 0.342 41.6 4.07 5.38
## 12 0.493 3.7 10.3 0.263 0.359 32 7.4 8.6
## 13 0.476 1.95 5.3 0.614 0.367 78.4 3.69 4.69
## 14 0.500 1.8 4.67 0.750 0.381 74.5 2.23 2.81
## 15 0.564 0.8 2.17 0.808 0.320 47.8 3.59 4.92
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.956 2.83 5.59 1 2.89 1.73
## 2 4.8 3.1 8.6 2 0.8 3.4
## 3 1.54 3.54 5.84 1.02 1.98 2.16
## 4 0.844 2.24 4.36 0.544 1.96 1.3
## 5 5.4 0.95 4 1.52 0.55 3.32
## 6 1.23 2.1 4.68 0.7 1.39 1.55
## 7 2.2 2.43 5.14 1.03 0.786 2.44
## 8 1.51 1.21 3.24 0.95 0.544 1.32
## 9 2.36 1.30 4.33 1.22 0.58 2.05
## 10 1.73 2.67 5.72 0.817 2.22 1.93
## 11 2.94 1.33 4.29 1.38 0.633 2.47
## 12 8.7 0.4 3.5 1.7 0.3 5.2
## 13 3.81 0.693 3.34 1.41 0.364 2.7
## 14 3.28 0.808 3.08 1.15 0.423 1.76
## 15 1.59 2.31 5.01 1.15 1.18 2.12
## pts_per_g_college all_cluster bust good ratio
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 11.8 9.67 0 0.667 Inf
## 2 19.2 7 0 1 Inf
## 3 17.7 9 0.125 0.5 4
## 4 8.36 14.8 0 0.333 Inf
## 5 19.1 3.88 0 0.5 Inf
## 6 12.3 7.06 0 0.5 Inf
## 7 17.9 9.43 0 0.143 Inf
## 8 11.0 10.1 0.0556 0.278 5
## 9 14.4 7.16 0.2 0.36 1.8
## 10 14.0 8 0.333 0.333 1
## 11 16.6 9.56 0.0556 0.444 8
## 12 27.4 6 0 1 Inf
## 13 16.9 11.1 0.0714 0.429 6
## 14 12.4 7.85 0.154 0.308 2
## 15 14.7 5.08 0.167 0.167 1
hulls <- df_pc |>
group_by(pc_cluster) |>
slice(chull(PC1, PC2))
p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = pc_cluster)) +
geom_polygon(data = hulls,
aes(group = pc_cluster, color = pc_cluster, fill = pc_cluster),
alpha = 0.2) +
geom_point() +
geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) +
geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
scale_x_reverse() +
labs(title = "Clusters using higher dimensional data",
x = "Dimension 1",
y = "Dimension 2",
color = "Cluster",
fill = "Cluster"
)
p
p2 <- p +
geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
aes(label = name),
size = 1.6,
max.overlaps = 20,
fill = NA,
label.size = NA,
segment.size = 0.2
)
p2